# -*- coding: utf-8 -*-
import pandas
import datetime
import sys
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
from sklearn.datasets import fetch_20newsgroups
from time import time

reload(sys)
sys.setdefaultencoding('utf-8')

root = "E:\SourceCode\Algorithm\个人征信\\train"
bank_detail = pandas.read_table('Z:/test.txt',header=None,encoding='gb2312',delim_whitespace=True,index_col=0)
#header=None:没有每列的column name，可以自己设定
#encoding='gb2312':其他编码中文显示错误
#delim_whitespace=True:用空格来分隔每行的数据
#index_col=0:设置第1列数据作为index